Contents

%run set_theme.ipynb
%%html
<style>
.slider-container .slider-rail-rect {
    fill: #d8bea1 !important;
    stroke-width: 0 !important;
    height: 7px !important;
}
.slider-grip-rect {
    fill: #343a42 !important;
    stroke-width: 0 !important;
    filter: drop-shadow(0 0 3px rgba(0, 0, 0, 0.3));
}
</style>
import pandas as pd
import plotly.graph_objs as go
import plotly.colors as pc
from plotly.offline import init_notebook_mode

init_notebook_mode()
df = pd.read_parquet('../data/SO_2014_2022.pq')

df = df[(df['Salary'] > 0) & (df['Salary'] < 250000)]

df.head()
Year Salary JobSat YearsCode YearsCodePro Age Education OrgSize LastNewJob Employment RespondentType JobSeek Gender Student Country CodingActivities DevType LearnCodeFrom LangPresent
0 2022 69318.0 <NA> 10 5 25-34 master 500 to 999 employees <NA> fulltime dev <NA> male no Germany School or academic work Data scientist or machine learning specialist;... Books / Physical media;School (i.e., Universit... C;C++;Java;JavaScript;MATLAB;Python;Scala;SQL;...
6 2022 27652.0 <NA> 18 10 25-34 bachelor 1,000 to 4,999 employees <NA> fulltime dev <NA> male no Colombia Hobby Developer, full-stack;Developer, back-end Books / Physical media;Other online resources ... Bash/Shell/PowerShell;Elixir;HTML/CSS;JavaScri...
9 2022 15431.0 <NA> 5 5 25-34 bachelor 20 to 99 employees <NA> fulltime dev <NA> male no Ghana Freelance/contract work Developer, back-end On the job training;Coding Bootcamp JavaScript;Ruby
13 2022 47352.0 <NA> 7 7 45-54 master 10 to 19 employees <NA> fulltime non-dev <NA> male no Belgium Hobby Developer, back-end;Educator or academic;Datab... Books / Physical media;On the job training;Col... Delphi;SQL
22 2022 78084.0 <NA> 25 25 45-54 bachelor 500 to 999 employees <NA> fulltime non-dev <NA> male no Canada Hobby;Contribute to open-source projects Engineer, site reliability;Security professional Books / Physical media;Other online resources ... Bash/Shell/PowerShell;C;JavaScript;Perl;PHP;Py...
def get_salary_gap_by_age(age_range: str) -> pd.DataFrame:
    # Get male salary distribution per country.
    salary_men_df = df.query(f'Gender == "male" & Age == "{age_range}"') \
        .groupby(['Country'], as_index=False) \
        .agg({ 'Salary': 'mean' }) \
        .rename(columns={'Salary': 'SalaryMen'})
    
    # Get female salary distribution per country.
    salary_women_df = df.query(f'Gender == "female" & Age == "{age_range}"') \
        .groupby(['Country'], as_index=False) \
        .agg({ 'Salary': 'mean' }) \
        .rename(columns={'Salary': 'SalaryWomen'})

    # Calculate salary gap.
    new_df = pd.merge(salary_men_df, salary_women_df, on='Country')
    new_df['SalaryGap'] = abs(new_df['SalaryMen'] - new_df['SalaryWomen'])        
    new_df['SalaryGapPercent'] = (new_df['SalaryMen'] - new_df['SalaryWomen']) / new_df['SalaryMen'] * 100

    return new_df


def make_title(age_range: str) -> str:
    """Create the plot title, given the age range."""
    return f'Global Salary Gap Distribution Between Men and Women ({age_range} years old)' + \
            '<br><sup>In most countries, a neutral or female-favoured pay gap turns male favoured at ages 35+</sup>'
# Gather all age ranges.
age_bins = df['Age'].unique().dropna().sort_values()

# Create traces (maps with data) for each age.
traces = []
for i, age_range in enumerate(age_bins):
    age_salary_gap_df = get_salary_gap_by_age(age_range)
    trace = go.Choropleth(
        locations=age_salary_gap_df['Country'],
        locationmode='country names',
        z=age_salary_gap_df['SalaryGapPercent'],
        colorscale=[[0, '#f222e5'], [0.47, '#f283eb'], [0.4701, '#d7f2b2'],
                    [0.5299, '#b2f2c6'], [0.53, '#9893ff'], [1, '#362cff']],
        hovertemplate='<b>%{location}</b><br>Salary gap: %{z:.1f}%<extra></extra>',
        visible=(True if i == 0 else False), # make first trace visible by default
        colorbar=go.choropleth.ColorBar(
            x=0.5,
            y=0.92,
            xref='paper',
            yref='paper',
            xanchor='center',
            yanchor='bottom',
            orientation='h',
            len=0.5,
            thickness=10,
            tickmode='array',
            title='',
            tickvals=[-100, 0, 100],
            ticktext=['female-favoured', 'neutral', 'male-favoured']
        ),
        zmin=-100,
        zmax=100,
    )
    traces.append(trace)

# Create the slider steps.
slider_steps = []
for i, age_range in enumerate(age_bins):
    slider_steps.append({
        'method': 'update',
        'args': [
            {'visible': [i == j for j in range(len(traces))]},
            {'title': make_title(age_range)},
        ],
        'label': age_range
    })

# Create the main figure.
fig = go.Figure(
    data=traces,
    layout=go.Layout(
        width=790,
        height=640,
        title=make_title(age_bins[0]),
        geo={'showocean': True, 'oceancolor': '#a8d5f2', 'landcolor': '#ffffff'},
        sliders=[{
            'active': 0,
            'currentvalue': {'prefix': 'Selected age: '},
            'steps': slider_steps
        }],
        margin={'t': 80, 'r': 20, 'b': 80, 'l': 20}
    )
)

# Some additional map configuration.
fig.update_geos(showcountries=True, showcoastlines=False)

# Add caption above the slider
fig.add_annotation(x=0.3, y=-0.09,
                   xref='paper', yref='paper',
                   xanchor='left',
                   yanchor='bottom',
                   arrowhead=4,
                   ax=30,
                   ay=-20,
                   arrowwidth=2,
                   text='Drag the slider to different ages to see the salary gap impact')
    
fig.show()